Packages

Load packages

require(pacman)
Caricamento del pacchetto richiesto: pacman
pacman::p_load(data.table, gdata, caTools, plotly, reshape2)
pacman::p_load_gh("luca-scr/smef")

Functions

# Return statistical mode of v
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

Globals

set.seed(100)

Dataset

Load dataset

car_data = fread("./data/car_price_train.csv")

Dataset info

head(car_data)
dim(car_data)
[1] 8938   18
smef::describe(car_data)
str(car_data)
Classes ‘data.table’ and 'data.frame':  8938 obs. of  18 variables:
 $ ID               : int  2969 4640 3292 10199 10150 2334 5859 8830 820 1294 ...
 $ Make             : chr  "Chevrolet" "Ford" "Nissan" "Toyota" ...
 $ Model            : chr  "Corvette" "F-250" "Cube" "Tacoma" ...
 $ Year             : int  2015 1999 2013 2016 2013 2006 2014 2016 2012 2015 ...
 $ Engine_Fuel_Type : chr  "premium_unleaded" "regular_unleaded" "regular_unleaded" "regular_unleaded" ...
 $ Engine_HP        : int  460 220 122 278 150 180 265 449 220 335 ...
 $ Engine_Cylinders : int  8 8 4 6 4 6 4 8 4 6 ...
 $ Transmission_Type: chr  "manual" "automatic" "automatic" "automatic" ...
 $ Driven_Wheels    : chr  "rear_wheel_drive" "four_wheel_drive" "front_wheel_drive" "four_wheel_drive" ...
 $ Number_of_Doors  : int  2 2 4 4 4 4 4 4 4 4 ...
 $ Market_Category  : chr  "High-Performance" "Unknown" "Unknown" "Unknown" ...
 $ Vehicle_Size     : chr  "Compact" "Large" "Compact" "Compact" ...
 $ Vehicle_Style    : chr  "Convertible" "Pickup" "Wagon" "Pickup" ...
 $ Highway_MPG      : int  29 15 31 23 33 24 25 26 29 30 ...
 $ City_MPG         : int  17 12 27 18 23 17 19 17 18 23 ...
 $ Age              : int  2 18 4 1 4 11 3 1 5 2 ...
 $ Popularity       : int  1385 5657 2009 2031 481 1851 640 617 376 3916 ...
 $ MSRP             : num  11.13 8.26 9.78 10.41 9.67 ...
 - attr(*, ".internal.selfref")=<externalptr> 

List of categorical features

cat_feat <- c('Make', 
              'Model',
              "Engine_Fuel_Type",
              'Transmission_Type', 
              'Driven_Wheels', 
              'Market_Category', 
              'Vehicle_Size', 
              'Vehicle_Style')

Unique values for categorical features

lapply(car_data[,cat_feat, with=F], unique)

MSRP distribution

hist(car_data[, MSRP], breaks = 50, main = "Histogram of MSRP", xlab = "MSRP")

Feature Engeneering and Missing values

Count Unknown\unknown values for each categorical feature

lapply(car_data[,cat_feat, with=F], 
       function(col) sum(isUnknown(col, unknown = c("Unknown","unknown"))))
$Make
[1] 0

$Model
[1] 0

$Engine_Fuel_Type
[1] 3

$Transmission_Type
[1] 15

$Driven_Wheels
[1] 0

$Market_Category
[1] 2817

$Vehicle_Size
[1] 0

$Vehicle_Style
[1] 0

Drop Market Category column (too many Unknown values)

car_data[,Market_Category:=NULL]
head(car_data)

Drop ID column

car_data[,ID:=NULL]
head(car_data)

Drop Year column

car_data[,Year:=NULL]
head(car_data)

Replace Unknown/unknown values with NA

car_data <- car_data[, lapply(.SD, function(x) replace(x, which(x=="Unknown"), NA))]
car_data <- car_data[, lapply(.SD, function(x) replace(x, which(x=="unknown"), NA))]
sum(is.na(car_data))
[1] 18

Replace NA values for Engine_Fuel_type and Transmission_type with respective mode value

fuel_transmission_mode = getmode(car_data[,c("Engine_Fuel_Type", "Transmission_Type"), with=F])
car_data <- replace_na(car_data, replace = c(fuel_transmission_mode))
sum(is.na(car_data))

Split dataset into training and validation set

split <- sample.split(car_data$MSRP, SplitRatio = 0.7)
train_set <- subset(car_data, split == "TRUE")
dim(train_set)
[1] 6256   15
valid_set <- subset(car_data, split == "FALSE")
dim(valid_set)
[1] 2682   15

Models

Model 1.1: Random Forest

Fit the model using the whole training set

rf_reg = randomForest(x = train_set[, .SD, .SDcols = !'MSRP'],
                      y = train_set[, MSRP],
                      ntree = 10, 
                      nodesize = 1,
                      mtry = dim(train_set)[2] - 1,
                      importance = T)
rf_reg

Call:
 randomForest(x = train_set[, .SD, .SDcols = !"MSRP"], y = train_set[,      MSRP], ntree = 10, mtry = dim(train_set)[2] - 1, nodesize = 1,      importance = T) 
               Type of random forest: regression
                     Number of trees: 10
No. of variables tried at each split: 14

          Mean of squared residuals: 0.01928734
                    % Var explained: 98.41

Calculate RMSE on train set

y_pred = predict(rf_reg, newdata=train_set[, .SD, .SDcols = !'MSRP'])
rmse(train_set[, MSRP], y_pred)
[1] 0.06707272

Calculate RMSE on validation set

y_pred = predict(rf_reg, newdata = valid_set[, .SD, .SDcols = !'MSRP'])
rmse(valid_set[, MSRP], y_pred)
[1] 0.1328723

Visualize results

hist(valid_set[, MSRP], 
     breaks = 50, 
     col = rgb(0,0,1,1/4), 
     main = "Random Forest",
     xlab = "MSRP")
hist(y_pred, breaks = 50, col=rgb(1,0,0,1/4), add = T)
box()

Feature Importance (normalized)

feat_imp = randomForest::importance(rf_reg, type=2)
feat_imp <- scale(feat_imp, center=F, scale=colSums(feat_imp))
feat_imp
                  IncNodePurity
Make               0.0088028123
Model              0.0040847728
Engine_Fuel_Type   0.0043276962
Engine_HP          0.1940287592
Engine_Cylinders   0.0101458630
Transmission_Type  0.0032721459
Driven_Wheels      0.0012693471
Number_of_Doors    0.0007883784
Vehicle_Size       0.0022554922
Vehicle_Style      0.0031662083
Highway_MPG        0.0045307032
City_MPG           0.0086011284
Age                0.7494937472
Popularity         0.0052329458
attr(,"scaled:scale")
IncNodePurity 
     7648.789 

Top 10 Features

top_values = feat_imp[order(feat_imp[,1],decreasing=T),][1:10]
top_feat = rownames(feat_imp)[order(feat_imp[,1], decreasing=T)[1:10]]
barplot(height=top_values, names=top_feat, col="#69b3a2", las=2)

Select topmost numerical features as FEATURES of INTEREST (FOI)

foi = c('Age', 
        'Engine_HP', 
        'Engine_Cylinders', 
        'City_MPG', 
        'Highway_MPG', 
        'Popularity')

Model 1.2: Random Forest with Features of Interest

Prepare Dataset

train_set_foi = train_set[, append(foi, 'MSRP'), with=F]
head(train_set_foi)
valid_set_foi = valid_set[, append(foi, 'MSRP'), with=F]
head(valid_set_foi)

Fit the model

rf_reg_foi = randomForest(x = train_set_foi[, .SD, .SDcols = !'MSRP'],
                          y = train_set_foi[, MSRP],
                          ntree = 10, 
                          nodesize = 1,
                          mtry = dim(train_set_foi)[2] - 1)
rf_reg_foi

Call:
 randomForest(x = train_set_foi[, .SD, .SDcols = !"MSRP"], y = train_set_foi[,      MSRP], ntree = 10, mtry = dim(train_set_foi)[2] - 1, nodesize = 1) 
               Type of random forest: regression
                     Number of trees: 10
No. of variables tried at each split: 6

          Mean of squared residuals: 0.02395499
                    % Var explained: 98.02

Calculate RMSE on train set

y_pred = predict(rf_reg_foi, newdata = train_set_foi[, .SD, .SDcols = !'MSRP'])
rmse(train_set_foi[, MSRP], y_pred)
[1] 0.08181244

Calculate RMSE on validation set

y_pred = predict(rf_reg_foi, newdata = valid_set_foi[, .SD, .SDcols = !'MSRP'])
rmse(valid_set_foi[, MSRP], y_pred)
[1] 0.1430698

Visualize results

hist(valid_set[, MSRP], 
     breaks = 50, 
     col = rgb(0,0,1,1/4), 
     main = "Random Forest 2",
     xlab = "MSRP")
hist(y_pred, breaks = 50, col=rgb(1,0,0,1/4), add = T)
box()

Model 1.3: Tuned Random Forest with Features of Interest

Fit the model …

rf_reg_2 = train(MSRP ~ .,
                 data = train_set_foi,
                 method = 'rf',
                 tuneGrid = expand.grid(mtry = 1:(dim(train_set_foi)[2] - 1)),
                 ntree = 10,
                 nodesize = 5,
                 trControl = trainControl(method = 'cv',
                                          number = 10,
                                          selectionFunction = "oneSE"))
rf_reg_2
Random Forest 

6256 samples
   6 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 5631, 5629, 5631, 5632, 5630, 5631, ... 
Resampling results across tuning parameters:

  mtry  RMSE       Rsquared   MAE       
  1     0.2165607  0.9648372  0.15040352
  2     0.1528493  0.9805445  0.10170087
  3     0.1465201  0.9821106  0.09644810
  4     0.1459980  0.9821635  0.09700915
  5     0.1455816  0.9823451  0.09656957
  6     0.1482990  0.9816152  0.09781316

RMSE was used to select the optimal model using  the one SE rule.
The final value used for the model was mtry = 3.

… or load trained forest

load("models/m_1_3_tuned_random_forest.RData")
rf_reg_2
Random Forest 

6256 samples
   6 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 5631, 5629, 5631, 5632, 5630, 5631, ... 
Resampling results across tuning parameters:

  mtry  RMSE       Rsquared   MAE       
  1     0.2165607  0.9648372  0.15040352
  2     0.1528493  0.9805445  0.10170087
  3     0.1465201  0.9821106  0.09644810
  4     0.1459980  0.9821635  0.09700915
  5     0.1455816  0.9823451  0.09656957
  6     0.1482990  0.9816152  0.09781316

RMSE was used to select the optimal model using  the one
 SE rule.
The final value used for the model was mtry = 3.

Calculate RMSE on train set

y_pred = predict(rf_reg_2, newdata = train_set_foi[, .SD, .SDcols = !'MSRP'])
rmse(train_set_foi[, MSRP], y_pred)
[1] 0.08811463

Calculate RMSE on validation set

y_pred = predict(rf_reg_2, newdata = valid_set_foi[, .SD, .SDcols = !'MSRP'])
rmse(valid_set_foi[, MSRP], y_pred)
[1] 0.1357432

Visualize results

hist(valid_set[, MSRP], 
     breaks = 50, 
     col = rgb(0,0,1,1/4), 
     main = "Random Forest 3",
     xlab = "MSRP")
hist(y_pred, breaks = 50, col=rgb(1,0,0,1/4), add = T)
box()

Model 2: Neural Netkork

Scale dataset

X_train = train_set_foi[, .SD, .SDcols = !'MSRP']
Y_train = train_set_foi[, MSRP]
dataScaler = caret::preProcess(X_train, method = c("center", "scale"))
X_train_scaled = predict(dataScaler, X_train)
X_valid_scaled = predict(dataScaler, valid_set_foi)
smef::describe(X_train_scaled)
                  Obs Mean StdDev    Min Median    Max
Age              6256    0      1 -0.873 -0.610  2.677
Engine_HP        6256    0      1 -1.739 -0.207  6.982
Engine_Cylinders 6256    0      1 -3.182  0.210  5.864
City_MPG         6256    0      1 -1.353 -0.183 12.482
Highway_MPG      6256    0      1 -1.710 -0.183  9.925
Popularity       6256    0      1 -1.068 -0.114  2.835
smef::describe(X_valid_scaled)
                  Obs    Mean StdDev     Min  Median     Max
Age              2682 -0.0075 0.9822 -0.8728 -0.4784  2.6767
Engine_HP        2682  0.0000 1.0210 -1.8041 -0.2252  6.9820
Engine_Cylinders 2682  0.0038 1.0001 -3.1820  0.2102  5.8638
City_MPG         2682 -0.0078 0.8428 -1.2469 -0.1826 11.9502
Highway_MPG      2682  0.0184 1.1713 -1.7105 -0.0650 38.4863
Popularity       2682  0.0132 1.0150 -1.0682 -0.1136  2.8350
MSRP             2682 10.1225 1.1025  7.6009 10.3106 14.5411

Fit the model …

nn_reg = train(x = X_train_scaled,
               y = Y_train,
               method = "nnet",
               tuneGrid = expand.grid(decay = c(0.01, 0.1, 1), size = 64),
               linout = T,
               maxit = 100,
               trace = F,
               trControl = trainControl(method = "cv", 
                                       number = 10, 
                                       selectionFunction = "oneSE"))
nn_reg
Neural Network 

6256 samples
   6 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 5631, 5631, 5631, 5631, 5630, 5629, ... 
Resampling results across tuning parameters:

  decay  RMSE       Rsquared   MAE      
  0.01   0.2246159  0.9583626  0.1629258
  0.10   0.2273319  0.9572557  0.1627390
  1.00   0.2642568  0.9423997  0.1847960

Tuning parameter 'size' was held constant at a value of 64
RMSE was used to select the optimal model using  the one SE rule.
The final values used for the model were size = 64 and decay = 0.1.

… or load trained network

load("models/m_2_neural_network.RData")
print(nn_reg)
Neural Network 

6256 samples
   6 predictor

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 5631, 5631, 5631, 5631, 5630, 5629, ... 
Resampling results across tuning parameters:

  decay  RMSE       Rsquared   MAE      
  0.01   0.2246159  0.9583626  0.1629258
  0.10   0.2273319  0.9572557  0.1627390
 1.00  0.2642568 0.9423997 0.1847960

Tuning parameter 'size' was held constant at a value of 64
RMSE was used to select the optimal model using  the one
 SE rule.
The final values used for the model were size = 64 and decay
 = 0.1.

Calculate RMSE on train set

y_pred = predict(nn_reg, newdata=X_train_scaled)
rmse(train_set_foi[, MSRP], y_pred)
[1] 0.2168384

Calculate RMSE on validation set

y_pred = predict(nn_reg, newdata = X_valid_scaled)
rmse(valid_set_foi[, MSRP], y_pred)
[1] 0.2244429

Visualize results

hist(valid_set[, MSRP], 
     breaks = 50, 
     col = rgb(0,0,1,1/4), 
     main = "Neural Network",
     xlab = "MSRP")
hist(y_pred, breaks = 50, col=rgb(1,0,0,1/4), add = T)
box()

Model 3: Linear Regression

Plot Age, Engine_HP and MSRP

plot_ly(train_set, x = ~Age, y = ~Engine_HP, z = ~MSRP, size = 1)
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Set an Age threshold

age_ths = 17

Linear Regression for ‘YOUNG’ cars

young_train_set = train_set[train_set[, Age < age_ths]]
young_valid_set = valid_set[valid_set[, Age < age_ths]]
young_lin_reg = lm(MSRP ~ Age + Engine_HP + Age:Engine_HP,
                   data = young_train_set)
summary(young_lin_reg)

Call:
lm(formula = MSRP ~ Age + Engine_HP + Age:Engine_HP, data = young_train_set)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.52031 -0.16691 -0.01932  0.14530  1.84610 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    9.466e+00  1.402e-02  675.14   <2e-16 ***
Age           -5.094e-02  2.324e-03  -21.92   <2e-16 ***
Engine_HP      3.971e-03  4.862e-05   81.67   <2e-16 ***
Age:Engine_HP  2.002e-04  9.341e-06   21.43   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2834 on 5387 degrees of freedom
Multiple R-squared:  0.7681,    Adjusted R-squared:  0.7679 
F-statistic:  5946 on 3 and 5387 DF,  p-value: < 2.2e-16

Calculate RMSE on training and validation set

y_pred = predict(young_lin_reg, newdata = young_train_set)
rmse(young_train_set[, MSRP], y_pred)
[1] 0.2833367
y_pred = predict(young_lin_reg, newdata = young_valid_set)
rmse(young_valid_set[, MSRP], y_pred)
[1] 0.2772316

Visualize results

hist(young_valid_set[, MSRP], 
     breaks = 50, 
     col = rgb(0,0,1,1/4), 
     main = "Linear Regression (Young Cars)",
     xlab = "MSRP")
hist(y_pred, breaks = 50, col=rgb(1,0,0,1/4), add = T)
box()

Visualize model

graph_res <- 1
axis_x <- seq(0, age_ths, by = graph_res)
axis_y <- seq(min(young_train_set$Engine_HP), max(young_train_set$Engine_HP), by = graph_res)
reg_surface <- expand.grid(Age = axis_x, Engine_HP = axis_y, KEEP.OUT.ATTRS = F)
reg_surface$MSRP <- predict(young_lin_reg, newdata = reg_surface)
reg_surface <- acast(reg_surface, Engine_HP ~ Age, value.var = "MSRP")

young_plot <- plot_ly(young_train_set, x = ~Age, y = ~Engine_HP, z = ~MSRP, type = "scatter3d", size = 1, mode ="markers")
young_plot <- add_trace(young_plot, x = axis_x, y = axis_y, z = reg_surface, type = "surface")
young_plot
Avvertimento: 'surface' objects don't have these attributes: 'mode'
Valid attributes include:
'_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Avvertimento: 'surface' objects don't have these attributes: 'mode'
Valid attributes include:
'_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Linear Regression for ‘OLD’ cars

old_train_set = train_set[train_set[, Age >= age_ths]]
old_valid_set = valid_set[valid_set[, Age >= age_ths]]
old_lin_reg = lm(MSRP ~ Age + Age:Engine_HP + poly(Engine_HP, 2),
                   data = old_train_set)
summary(old_lin_reg)

Call:
lm(formula = MSRP ~ Age + Age:Engine_HP + poly(Engine_HP, 2), 
    data = old_train_set)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.59785 -0.09773 -0.01678  0.02318  2.71053 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)          8.383e+00  7.238e-02 115.814  < 2e-16 ***
Age                  2.495e-02  1.081e-02   2.309   0.0212 *  
poly(Engine_HP, 2)1  1.582e+01  2.264e+00   6.988 5.58e-12 ***
poly(Engine_HP, 2)2  1.685e+00  2.753e-01   6.120 1.42e-09 ***
Age:Engine_HP       -3.222e-04  6.107e-05  -5.276 1.67e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.269 on 860 degrees of freedom
Multiple R-squared:  0.349, Adjusted R-squared:  0.346 
F-statistic: 115.3 on 4 and 860 DF,  p-value: < 2.2e-16

Calculate RMSE on training and validation set

y_pred = predict(old_lin_reg, newdata = old_train_set)
rmse(old_train_set[, MSRP], y_pred)
[1] 0.2681941
y_pred = predict(old_lin_reg, newdata = old_valid_set)
rmse(old_valid_set[, MSRP], y_pred)
[1] 0.1849445

Visualize results

hist(old_valid_set[, MSRP], 
     breaks = 20, 
     col = rgb(0,0,1,1/4), 
     main = "Linear Regression (Old Cars)",
     xlab = "MSRP")
hist(y_pred, breaks = 20, col=rgb(1,0,0,1/4), add = T)
box()

Visualize model

graph_res <- 1
axis_x <- seq(age_ths, max(old_train_set$Age), by = graph_res)
axis_y <- seq(min(old_train_set$Engine_HP), max(old_train_set$Engine_HP), by = graph_res)
reg_surface <- expand.grid(Age = axis_x, Engine_HP = axis_y, KEEP.OUT.ATTRS = F)
reg_surface$MSRP <- predict(old_lin_reg, newdata = reg_surface)
reg_surface <- acast(reg_surface, Engine_HP ~ Age, value.var = "MSRP")

old_plot <- plot_ly(old_valid_set, x = ~Age, y = ~Engine_HP, z = ~MSRP, type = "scatter3d", size = 1, mode ="markers")
old_plot <- add_trace(old_plot, x = axis_x, y = axis_y, z = reg_surface, type = "surface")
old_plot
Avvertimento: 'surface' objects don't have these attributes: 'mode'
Valid attributes include:
'_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Avvertimento: 'surface' objects don't have these attributes: 'mode'
Valid attributes include:
'_deprecated', 'autocolorscale', 'cauto', 'cmax', 'cmid', 'cmin', 'coloraxis', 'colorbar', 'colorscale', 'connectgaps', 'contours', 'customdata', 'customdatasrc', 'hidesurface', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'lighting', 'lightposition', 'meta', 'metasrc', 'name', 'opacity', 'opacityscale', 'reversescale', 'scene', 'showlegend', 'showscale', 'stream', 'surfacecolor', 'surfacecolorsrc', 'text', 'textsrc', 'type', 'uid', 'uirevision', 'visible', 'x', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'ycalendar', 'yhoverformat', 'ysrc', 'z', 'zcalendar', 'zhoverformat', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
#

Using only Engine_HP

old_lin_reg = lm(MSRP ~ poly(Engine_HP, 2),
                 data = old_train_set)
summary(old_lin_reg)

Call:
lm(formula = MSRP ~ poly(Engine_HP, 2), data = old_train_set)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.95194 -0.10852 -0.03088  0.01355  2.76294 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         7.752768   0.009711  798.37  < 2e-16 ***
poly(Engine_HP, 2)1 4.751969   0.285600   16.64  < 2e-16 ***
poly(Engine_HP, 2)2 1.639302   0.285600    5.74 1.31e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2856 on 862 degrees of freedom
Multiple R-squared:  0.2644,    Adjusted R-squared:  0.2627 
F-statistic: 154.9 on 2 and 862 DF,  p-value: < 2.2e-16

Calculate RMSE on training and validation set

y_pred = predict(old_lin_reg, newdata = old_train_set)
rmse(old_train_set[, MSRP], y_pred)
[1] 0.2851045
y_pred = predict(old_lin_reg, newdata = old_valid_set)
rmse(old_valid_set[, MSRP], y_pred)
[1] 0.2027388

Visualize model

ggplot(data = old_valid_set, aes(x = Engine_HP, y = MSRP)) + geom_point()

ggmatplot(x = old_valid_set$Engine_HP, y = y_pred, add = T, type = "l", col = "red")

Predict Test Set

Load test set

car_test = fread("./data/car_price_test.csv")

Check for missing values in Features of Interest

sum(is.na(car_test[,foi, with = F]))
[1] 0

MODEL 1.3 : Tuned Random Forest

y_pred_rf = predict(rf_reg_2, newdata=car_test[, foi, with=F])

Save as csv file

# write.csv(data.frame(ID = car_test$ID, Price = y_pred_rf),
#           row.names = FALSE,
#           file = "./results/Davide_Belfiori_submission1.csv")

MODEL 2 : Neural Network

car_test_scaled = predict(dataScaler, car_test[, foi, with=F])
y_pred_nn = predict(nn_reg, car_test_scaled)

Save as csv file

# write.csv(data.frame(ID = car_test$ID, Price = y_pred_nn),
#           row.names = FALSE,
#           file = "./results/Davide_Belfiori_submission2.csv")

MODEL 3 : Linear Regression

young_car_test = car_test[car_test[, Age < age_ths]]
y_pred_new = predict(young_lin_reg, newdata = young_car_test)
old_car_test = car_test[car_test[, Age >= age_ths]]
y_pred_old = predict(old_lin_reg, newdata = old_car_test)

Save as csv file

# write.csv(rbind(data.frame(ID = young_car_test$ID, Price = y_pred_new),
#                 data.frame(ID = old_car_test$ID, Price = y_pred_old)),
#           row.names = FALSE,
#           file = "./results/Davide_Belfiori_submission3.csv")

Compare results

hist(y_pred_rf, 
     breaks = 20, 
     col = rgb(0,0,1,1/4), 
     main = "Summary",
     xlab = "MSRP")
hist(y_pred_nn, breaks = 20, col=rgb(1,0,0,1/4), add = T)
hist(append(y_pred_new, y_pred_old), breaks = 20, col=rgb(0,1,0,1/4), add = T)
box()

LS0tDQp0aXRsZTogIkNhciBQcmljZSBQcmVkaWN0aW9uIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyBQYWNrYWdlcw0KDQpMb2FkIHBhY2thZ2VzDQoNCmBgYHtyfQ0KcmVxdWlyZShwYWNtYW4pDQpwYWNtYW46OnBfbG9hZChkYXRhLnRhYmxlLCBnZGF0YSwgY2FUb29scywgcGxvdGx5LCByZXNoYXBlMikNCnBhY21hbjo6cF9sb2FkX2doKCJsdWNhLXNjci9zbWVmIikNCmBgYA0KDQojIEZ1bmN0aW9ucw0KDQpgYGB7cn0NCiMgUmV0dXJuIHN0YXRpc3RpY2FsIG1vZGUgb2Ygdg0KZ2V0bW9kZSA8LSBmdW5jdGlvbih2KSB7DQogIHVuaXF2IDwtIHVuaXF1ZSh2KQ0KICB1bmlxdlt3aGljaC5tYXgodGFidWxhdGUobWF0Y2godiwgdW5pcXYpKSldDQp9DQpgYGANCg0KIyBHbG9iYWxzDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTAwKQ0KYGBgDQoNCiMgRGF0YXNldA0KDQpMb2FkIGRhdGFzZXQNCg0KYGBge3J9DQpjYXJfZGF0YSA9IGZyZWFkKCIuL2RhdGEvY2FyX3ByaWNlX3RyYWluLmNzdiIpDQpgYGANCg0KRGF0YXNldCBpbmZvDQoNCmBgYHtyfQ0KaGVhZChjYXJfZGF0YSkNCmBgYA0KDQpgYGB7cn0NCmRpbShjYXJfZGF0YSkNCmBgYA0KDQpgYGB7cn0NCnNtZWY6OmRlc2NyaWJlKGNhcl9kYXRhKQ0KYGBgDQoNCmBgYHtyfQ0Kc3RyKGNhcl9kYXRhKQ0KYGBgDQoNCkxpc3Qgb2YgY2F0ZWdvcmljYWwgZmVhdHVyZXMNCg0KYGBge3J9DQpjYXRfZmVhdCA8LSBjKCdNYWtlJywgDQogICAgICAgICAgICAgICdNb2RlbCcsDQogICAgICAgICAgICAgICJFbmdpbmVfRnVlbF9UeXBlIiwNCiAgICAgICAgICAgICAgJ1RyYW5zbWlzc2lvbl9UeXBlJywgDQogICAgICAgICAgICAgICdEcml2ZW5fV2hlZWxzJywgDQogICAgICAgICAgICAgICdNYXJrZXRfQ2F0ZWdvcnknLCANCiAgICAgICAgICAgICAgJ1ZlaGljbGVfU2l6ZScsIA0KICAgICAgICAgICAgICAnVmVoaWNsZV9TdHlsZScpDQpgYGANCg0KVW5pcXVlIHZhbHVlcyBmb3IgY2F0ZWdvcmljYWwgZmVhdHVyZXMNCg0KYGBge3J9DQpsYXBwbHkoY2FyX2RhdGFbLGNhdF9mZWF0LCB3aXRoPUZdLCB1bmlxdWUpDQpgYGANCg0KKipNU1JQKiogZGlzdHJpYnV0aW9uDQoNCmBgYHtyfQ0KaGlzdChjYXJfZGF0YVssIE1TUlBdLCBicmVha3MgPSA1MCwgbWFpbiA9ICJIaXN0b2dyYW0gb2YgTVNSUCIsIHhsYWIgPSAiTVNSUCIpDQpgYGANCg0KIyBGZWF0dXJlIEVuZ2VuZWVyaW5nIGFuZCBNaXNzaW5nIHZhbHVlcw0KDQpDb3VudCAqVW5rbm93blxcdW5rbm93biogdmFsdWVzIGZvciBlYWNoIGNhdGVnb3JpY2FsIGZlYXR1cmUNCg0KYGBge3J9DQpsYXBwbHkoY2FyX2RhdGFbLGNhdF9mZWF0LCB3aXRoPUZdLCANCiAgICAgICBmdW5jdGlvbihjb2wpIHN1bShpc1Vua25vd24oY29sLCB1bmtub3duID0gYygiVW5rbm93biIsInVua25vd24iKSkpKQ0KYGBgDQoNCkRyb3AgKipNYXJrZXQgQ2F0ZWdvcnkqKiBjb2x1bW4gKHRvbyBtYW55IFVua25vd24gdmFsdWVzKQ0KDQpgYGB7cn0NCmNhcl9kYXRhWyxNYXJrZXRfQ2F0ZWdvcnk6PU5VTExdDQpoZWFkKGNhcl9kYXRhKQ0KYGBgDQoNCkRyb3AgKipJRCoqIGNvbHVtbg0KDQpgYGB7cn0NCmNhcl9kYXRhWyxJRDo9TlVMTF0NCmhlYWQoY2FyX2RhdGEpDQpgYGANCg0KRHJvcCAqKlllYXIqKiBjb2x1bW4NCg0KYGBge3J9DQpjYXJfZGF0YVssWWVhcjo9TlVMTF0NCmhlYWQoY2FyX2RhdGEpDQpgYGANCg0KUmVwbGFjZSAqVW5rbm93bi91bmtub3duKiB2YWx1ZXMgd2l0aCAqTkEqDQoNCmBgYHtyfQ0KY2FyX2RhdGEgPC0gY2FyX2RhdGFbLCBsYXBwbHkoLlNELCBmdW5jdGlvbih4KSByZXBsYWNlKHgsIHdoaWNoKHg9PSJVbmtub3duIiksIE5BKSldDQpjYXJfZGF0YSA8LSBjYXJfZGF0YVssIGxhcHBseSguU0QsIGZ1bmN0aW9uKHgpIHJlcGxhY2UoeCwgd2hpY2goeD09InVua25vd24iKSwgTkEpKV0NCnN1bShpcy5uYShjYXJfZGF0YSkpDQpgYGANCg0KUmVwbGFjZSAqTkEqIHZhbHVlcyBmb3IgKipFbmdpbmVfRnVlbF90eXBlKiogYW5kICoqVHJhbnNtaXNzaW9uX3R5cGUqKiB3aXRoIHJlc3BlY3RpdmUgbW9kZSB2YWx1ZQ0KDQpgYGB7cn0NCmZ1ZWxfdHJhbnNtaXNzaW9uX21vZGUgPSBnZXRtb2RlKGNhcl9kYXRhWyxjKCJFbmdpbmVfRnVlbF9UeXBlIiwgIlRyYW5zbWlzc2lvbl9UeXBlIiksIHdpdGg9Rl0pDQpjYXJfZGF0YSA8LSByZXBsYWNlX25hKGNhcl9kYXRhLCByZXBsYWNlID0gYyhmdWVsX3RyYW5zbWlzc2lvbl9tb2RlKSkNCnN1bShpcy5uYShjYXJfZGF0YSkpDQpgYGANCg0KU3BsaXQgZGF0YXNldCBpbnRvICoqKnRyYWluaW5nKioqIGFuZCAqKip2YWxpZGF0aW9uKioqIHNldA0KDQpgYGB7cn0NCnNwbGl0IDwtIHNhbXBsZS5zcGxpdChjYXJfZGF0YSRNU1JQLCBTcGxpdFJhdGlvID0gMC43KQ0KdHJhaW5fc2V0IDwtIHN1YnNldChjYXJfZGF0YSwgc3BsaXQgPT0gIlRSVUUiKQ0KZGltKHRyYWluX3NldCkNCnZhbGlkX3NldCA8LSBzdWJzZXQoY2FyX2RhdGEsIHNwbGl0ID09ICJGQUxTRSIpDQpkaW0odmFsaWRfc2V0KQ0KYGBgDQoNCiMgTW9kZWxzDQoNCiMjIyAqKk1vZGVsIDEuMSoqOiBSYW5kb20gRm9yZXN0DQoNCkZpdCB0aGUgbW9kZWwgdXNpbmcgdGhlIHdob2xlIHRyYWluaW5nIHNldA0KDQpgYGB7cn0NCnJmX3JlZyA9IHJhbmRvbUZvcmVzdCh4ID0gdHJhaW5fc2V0WywgLlNELCAuU0Rjb2xzID0gISdNU1JQJ10sDQogICAgICAgICAgICAgICAgICAgICAgeSA9IHRyYWluX3NldFssIE1TUlBdLA0KICAgICAgICAgICAgICAgICAgICAgIG50cmVlID0gMTAsIA0KICAgICAgICAgICAgICAgICAgICAgIG5vZGVzaXplID0gMSwNCiAgICAgICAgICAgICAgICAgICAgICBtdHJ5ID0gZGltKHRyYWluX3NldClbMl0gLSAxLA0KICAgICAgICAgICAgICAgICAgICAgIGltcG9ydGFuY2UgPSBUKQ0KcmZfcmVnDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdHJhaW4gc2V0DQoNCmBgYHtyfQ0KeV9wcmVkID0gcHJlZGljdChyZl9yZWcsIG5ld2RhdGE9dHJhaW5fc2V0WywgLlNELCAuU0Rjb2xzID0gISdNU1JQJ10pDQpybXNlKHRyYWluX3NldFssIE1TUlBdLCB5X3ByZWQpDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdmFsaWRhdGlvbiBzZXQNCg0KYGBge3J9DQp5X3ByZWQgPSBwcmVkaWN0KHJmX3JlZywgbmV3ZGF0YSA9IHZhbGlkX3NldFssIC5TRCwgLlNEY29scyA9ICEnTVNSUCddKQ0Kcm1zZSh2YWxpZF9zZXRbLCBNU1JQXSwgeV9wcmVkKQ0KYGBgDQoNClZpc3VhbGl6ZSByZXN1bHRzDQoNCmBgYHtyfQ0KaGlzdCh2YWxpZF9zZXRbLCBNU1JQXSwgDQogICAgIGJyZWFrcyA9IDUwLCANCiAgICAgY29sID0gcmdiKDAsMCwxLDEvNCksIA0KICAgICBtYWluID0gIlJhbmRvbSBGb3Jlc3QiLA0KICAgICB4bGFiID0gIk1TUlAiKQ0KaGlzdCh5X3ByZWQsIGJyZWFrcyA9IDUwLCBjb2w9cmdiKDEsMCwwLDEvNCksIGFkZCA9IFQpDQpib3goKQ0KYGBgDQoNCioqRmVhdHVyZSBJbXBvcnRhbmNlKiogKG5vcm1hbGl6ZWQpDQoNCmBgYHtyfQ0KZmVhdF9pbXAgPSByYW5kb21Gb3Jlc3Q6OmltcG9ydGFuY2UocmZfcmVnLCB0eXBlPTIpDQpmZWF0X2ltcCA8LSBzY2FsZShmZWF0X2ltcCwgY2VudGVyPUYsIHNjYWxlPWNvbFN1bXMoZmVhdF9pbXApKQ0KZmVhdF9pbXANCmBgYA0KDQpUb3AgMTAgRmVhdHVyZXMNCg0KYGBge3J9DQp0b3BfdmFsdWVzID0gZmVhdF9pbXBbb3JkZXIoZmVhdF9pbXBbLDFdLGRlY3JlYXNpbmc9VCksXVsxOjEwXQ0KdG9wX2ZlYXQgPSByb3duYW1lcyhmZWF0X2ltcClbb3JkZXIoZmVhdF9pbXBbLDFdLCBkZWNyZWFzaW5nPVQpWzE6MTBdXQ0KYmFycGxvdChoZWlnaHQ9dG9wX3ZhbHVlcywgbmFtZXM9dG9wX2ZlYXQsIGNvbD0iIzY5YjNhMiIsIGxhcz0yKQ0KYGBgDQoNClNlbGVjdCB0b3Btb3N0IG51bWVyaWNhbCBmZWF0dXJlcyBhcyAqKipGRUFUVVJFUyBvZiBJTlRFUkVTVCoqKiAoRk9JKQ0KDQpgYGB7cn0NCmZvaSA9IGMoJ0FnZScsIA0KICAgICAgICAnRW5naW5lX0hQJywgDQogICAgICAgICdFbmdpbmVfQ3lsaW5kZXJzJywgDQogICAgICAgICdDaXR5X01QRycsIA0KICAgICAgICAnSGlnaHdheV9NUEcnLCANCiAgICAgICAgJ1BvcHVsYXJpdHknKQ0KYGBgDQoNCiMjIyAqKk1vZGVsIDEuMioqOiBSYW5kb20gRm9yZXN0IHdpdGggRmVhdHVyZXMgb2YgSW50ZXJlc3QNCg0KUHJlcGFyZSBEYXRhc2V0DQoNCmBgYHtyfQ0KdHJhaW5fc2V0X2ZvaSA9IHRyYWluX3NldFssIGFwcGVuZChmb2ksICdNU1JQJyksIHdpdGg9Rl0NCmhlYWQodHJhaW5fc2V0X2ZvaSkNCnZhbGlkX3NldF9mb2kgPSB2YWxpZF9zZXRbLCBhcHBlbmQoZm9pLCAnTVNSUCcpLCB3aXRoPUZdDQpoZWFkKHZhbGlkX3NldF9mb2kpDQpgYGANCg0KRml0IHRoZSBtb2RlbA0KDQpgYGB7cn0NCnJmX3JlZ19mb2kgPSByYW5kb21Gb3Jlc3QoeCA9IHRyYWluX3NldF9mb2lbLCAuU0QsIC5TRGNvbHMgPSAhJ01TUlAnXSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgeSA9IHRyYWluX3NldF9mb2lbLCBNU1JQXSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgbnRyZWUgPSAxMCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgIG5vZGVzaXplID0gMSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgbXRyeSA9IGRpbSh0cmFpbl9zZXRfZm9pKVsyXSAtIDEpDQpyZl9yZWdfZm9pDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdHJhaW4gc2V0DQoNCmBgYHtyfQ0KeV9wcmVkID0gcHJlZGljdChyZl9yZWdfZm9pLCBuZXdkYXRhID0gdHJhaW5fc2V0X2ZvaVssIC5TRCwgLlNEY29scyA9ICEnTVNSUCddKQ0Kcm1zZSh0cmFpbl9zZXRfZm9pWywgTVNSUF0sIHlfcHJlZCkNCmBgYA0KDQpDYWxjdWxhdGUgUk1TRSBvbiB2YWxpZGF0aW9uIHNldA0KDQpgYGB7cn0NCnlfcHJlZCA9IHByZWRpY3QocmZfcmVnX2ZvaSwgbmV3ZGF0YSA9IHZhbGlkX3NldF9mb2lbLCAuU0QsIC5TRGNvbHMgPSAhJ01TUlAnXSkNCnJtc2UodmFsaWRfc2V0X2ZvaVssIE1TUlBdLCB5X3ByZWQpDQpgYGANCg0KVmlzdWFsaXplIHJlc3VsdHMNCg0KYGBge3J9DQpoaXN0KHZhbGlkX3NldFssIE1TUlBdLCANCiAgICAgYnJlYWtzID0gNTAsIA0KICAgICBjb2wgPSByZ2IoMCwwLDEsMS80KSwgDQogICAgIG1haW4gPSAiUmFuZG9tIEZvcmVzdCAyIiwNCiAgICAgeGxhYiA9ICJNU1JQIikNCmhpc3QoeV9wcmVkLCBicmVha3MgPSA1MCwgY29sPXJnYigxLDAsMCwxLzQpLCBhZGQgPSBUKQ0KYm94KCkNCg0KYGBgDQoNCiMjIyAqKk1vZGVsIDEuMyoqOiBUdW5lZCBSYW5kb20gRm9yZXN0IHdpdGggRmVhdHVyZXMgb2YgSW50ZXJlc3QNCg0KRml0IHRoZSBtb2RlbCAuLi4NCg0KYGBge3J9DQpyZl9yZWdfMiA9IHRyYWluKE1TUlAgfiAuLA0KICAgICAgICAgICAgICAgICBkYXRhID0gdHJhaW5fc2V0X2ZvaSwNCiAgICAgICAgICAgICAgICAgbWV0aG9kID0gJ3JmJywNCiAgICAgICAgICAgICAgICAgdHVuZUdyaWQgPSBleHBhbmQuZ3JpZChtdHJ5ID0gMTooZGltKHRyYWluX3NldF9mb2kpWzJdIC0gMSkpLA0KICAgICAgICAgICAgICAgICBudHJlZSA9IDEwLA0KICAgICAgICAgICAgICAgICBub2Rlc2l6ZSA9IDUsDQogICAgICAgICAgICAgICAgIHRyQ29udHJvbCA9IHRyYWluQ29udHJvbChtZXRob2QgPSAnY3YnLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbnVtYmVyID0gMTAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZWxlY3Rpb25GdW5jdGlvbiA9ICJvbmVTRSIpKQ0KcmZfcmVnXzINCmBgYA0KDQouLi4gb3IgbG9hZCB0cmFpbmVkIGZvcmVzdA0KDQpgYGB7cn0NCmxvYWQoIm1vZGVscy9tXzFfM190dW5lZF9yYW5kb21fZm9yZXN0LlJEYXRhIikNCnJmX3JlZ18yDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdHJhaW4gc2V0DQoNCmBgYHtyfQ0KeV9wcmVkID0gcHJlZGljdChyZl9yZWdfMiwgbmV3ZGF0YSA9IHRyYWluX3NldF9mb2lbLCAuU0QsIC5TRGNvbHMgPSAhJ01TUlAnXSkNCnJtc2UodHJhaW5fc2V0X2ZvaVssIE1TUlBdLCB5X3ByZWQpDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdmFsaWRhdGlvbiBzZXQNCg0KYGBge3J9DQp5X3ByZWQgPSBwcmVkaWN0KHJmX3JlZ18yLCBuZXdkYXRhID0gdmFsaWRfc2V0X2ZvaVssIC5TRCwgLlNEY29scyA9ICEnTVNSUCddKQ0Kcm1zZSh2YWxpZF9zZXRfZm9pWywgTVNSUF0sIHlfcHJlZCkNCmBgYA0KDQpWaXN1YWxpemUgcmVzdWx0cw0KDQpgYGB7cn0NCmhpc3QodmFsaWRfc2V0WywgTVNSUF0sIA0KICAgICBicmVha3MgPSA1MCwgDQogICAgIGNvbCA9IHJnYigwLDAsMSwxLzQpLCANCiAgICAgbWFpbiA9ICJSYW5kb20gRm9yZXN0IDMiLA0KICAgICB4bGFiID0gIk1TUlAiKQ0KaGlzdCh5X3ByZWQsIGJyZWFrcyA9IDUwLCBjb2w9cmdiKDEsMCwwLDEvNCksIGFkZCA9IFQpDQpib3goKQ0KYGBgDQoNCiMjIyAqKk1vZGVsIDIqKjogTmV1cmFsIE5ldGtvcmsNCg0KKioqU2NhbGUqKiogZGF0YXNldA0KDQpgYGB7cn0NClhfdHJhaW4gPSB0cmFpbl9zZXRfZm9pWywgLlNELCAuU0Rjb2xzID0gISdNU1JQJ10NCllfdHJhaW4gPSB0cmFpbl9zZXRfZm9pWywgTVNSUF0NCmRhdGFTY2FsZXIgPSBjYXJldDo6cHJlUHJvY2VzcyhYX3RyYWluLCBtZXRob2QgPSBjKCJjZW50ZXIiLCAic2NhbGUiKSkNClhfdHJhaW5fc2NhbGVkID0gcHJlZGljdChkYXRhU2NhbGVyLCBYX3RyYWluKQ0KWF92YWxpZF9zY2FsZWQgPSBwcmVkaWN0KGRhdGFTY2FsZXIsIHZhbGlkX3NldF9mb2kpDQpzbWVmOjpkZXNjcmliZShYX3RyYWluX3NjYWxlZCkNCnNtZWY6OmRlc2NyaWJlKFhfdmFsaWRfc2NhbGVkKQ0KYGBgDQoNCkZpdCB0aGUgbW9kZWwgLi4uDQoNCmBgYHtyfQ0Kbm5fcmVnID0gdHJhaW4oeCA9IFhfdHJhaW5fc2NhbGVkLA0KICAgICAgICAgICAgICAgeSA9IFlfdHJhaW4sDQogICAgICAgICAgICAgICBtZXRob2QgPSAibm5ldCIsDQogICAgICAgICAgICAgICB0dW5lR3JpZCA9IGV4cGFuZC5ncmlkKGRlY2F5ID0gYygwLjAxLCAwLjEsIDEpLCBzaXplID0gNjQpLA0KICAgICAgICAgICAgICAgbGlub3V0ID0gVCwNCiAgICAgICAgICAgICAgIG1heGl0ID0gMTAwLA0KICAgICAgICAgICAgICAgdHJhY2UgPSBGLA0KICAgICAgICAgICAgICAgdHJDb250cm9sID0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJjdiIsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbnVtYmVyID0gMTAsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2VsZWN0aW9uRnVuY3Rpb24gPSAib25lU0UiKSkNCm5uX3JlZw0KYGBgDQoNCi4uLiBvciBsb2FkIHRyYWluZWQgbmV0d29yaw0KDQpgYGB7cn0NCmxvYWQoIm1vZGVscy9tXzJfbmV1cmFsX25ldHdvcmsuUkRhdGEiKQ0KcHJpbnQobm5fcmVnKQ0KYGBgDQoNCkNhbGN1bGF0ZSBSTVNFIG9uIHRyYWluIHNldA0KDQpgYGB7cn0NCnlfcHJlZCA9IHByZWRpY3Qobm5fcmVnLCBuZXdkYXRhPVhfdHJhaW5fc2NhbGVkKQ0Kcm1zZSh0cmFpbl9zZXRfZm9pWywgTVNSUF0sIHlfcHJlZCkNCmBgYA0KDQpDYWxjdWxhdGUgUk1TRSBvbiB2YWxpZGF0aW9uIHNldA0KDQpgYGB7cn0NCnlfcHJlZCA9IHByZWRpY3Qobm5fcmVnLCBuZXdkYXRhID0gWF92YWxpZF9zY2FsZWQpDQpybXNlKHZhbGlkX3NldF9mb2lbLCBNU1JQXSwgeV9wcmVkKQ0KYGBgDQoNClZpc3VhbGl6ZSByZXN1bHRzDQoNCmBgYHtyfQ0KaGlzdCh2YWxpZF9zZXRbLCBNU1JQXSwgDQogICAgIGJyZWFrcyA9IDUwLCANCiAgICAgY29sID0gcmdiKDAsMCwxLDEvNCksIA0KICAgICBtYWluID0gIk5ldXJhbCBOZXR3b3JrIiwNCiAgICAgeGxhYiA9ICJNU1JQIikNCmhpc3QoeV9wcmVkLCBicmVha3MgPSA1MCwgY29sPXJnYigxLDAsMCwxLzQpLCBhZGQgPSBUKQ0KYm94KCkNCmBgYA0KDQojIyMgKipNb2RlbCAzKio6IExpbmVhciBSZWdyZXNzaW9uDQoNClBsb3QgKipBZ2UqKiwgKipFbmdpbmVfSFAqKiBhbmQgKipNU1JQKioNCg0KYGBge3J9DQpwbG90X2x5KHRyYWluX3NldCwgeCA9IH5BZ2UsIHkgPSB+RW5naW5lX0hQLCB6ID0gfk1TUlAsIHNpemUgPSAxKQ0KYGBgDQoNClNldCBhbiAqKkFnZSoqIHRocmVzaG9sZA0KDQpgYGB7cn0NCmFnZV90aHMgPSAxNw0KYGBgDQoNCkxpbmVhciBSZWdyZXNzaW9uIGZvciAnKllPVU5HKicgY2Fycw0KDQpgYGB7cn0NCnlvdW5nX3RyYWluX3NldCA9IHRyYWluX3NldFt0cmFpbl9zZXRbLCBBZ2UgPCBhZ2VfdGhzXV0NCnlvdW5nX3ZhbGlkX3NldCA9IHZhbGlkX3NldFt2YWxpZF9zZXRbLCBBZ2UgPCBhZ2VfdGhzXV0NCnlvdW5nX2xpbl9yZWcgPSBsbShNU1JQIH4gQWdlICsgRW5naW5lX0hQICsgQWdlOkVuZ2luZV9IUCwNCiAgICAgICAgICAgICAgICAgICBkYXRhID0geW91bmdfdHJhaW5fc2V0KQ0Kc3VtbWFyeSh5b3VuZ19saW5fcmVnKQ0KYGBgDQoNCkNhbGN1bGF0ZSBSTVNFIG9uIHRyYWluaW5nIGFuZCB2YWxpZGF0aW9uIHNldA0KDQpgYGB7cn0NCnlfcHJlZCA9IHByZWRpY3QoeW91bmdfbGluX3JlZywgbmV3ZGF0YSA9IHlvdW5nX3RyYWluX3NldCkNCnJtc2UoeW91bmdfdHJhaW5fc2V0WywgTVNSUF0sIHlfcHJlZCkNCnlfcHJlZCA9IHByZWRpY3QoeW91bmdfbGluX3JlZywgbmV3ZGF0YSA9IHlvdW5nX3ZhbGlkX3NldCkNCnJtc2UoeW91bmdfdmFsaWRfc2V0WywgTVNSUF0sIHlfcHJlZCkNCmBgYA0KDQpWaXN1YWxpemUgcmVzdWx0cw0KDQpgYGB7cn0NCmhpc3QoeW91bmdfdmFsaWRfc2V0WywgTVNSUF0sIA0KICAgICBicmVha3MgPSA1MCwgDQogICAgIGNvbCA9IHJnYigwLDAsMSwxLzQpLCANCiAgICAgbWFpbiA9ICJMaW5lYXIgUmVncmVzc2lvbiAoWW91bmcgQ2FycykiLA0KICAgICB4bGFiID0gIk1TUlAiKQ0KaGlzdCh5X3ByZWQsIGJyZWFrcyA9IDUwLCBjb2w9cmdiKDEsMCwwLDEvNCksIGFkZCA9IFQpDQpib3goKQ0KYGBgDQoNClZpc3VhbGl6ZSBtb2RlbA0KDQpgYGB7cn0NCmdyYXBoX3JlcyA8LSAxDQpheGlzX3ggPC0gc2VxKDAsIGFnZV90aHMsIGJ5ID0gZ3JhcGhfcmVzKQ0KYXhpc195IDwtIHNlcShtaW4oeW91bmdfdHJhaW5fc2V0JEVuZ2luZV9IUCksIG1heCh5b3VuZ190cmFpbl9zZXQkRW5naW5lX0hQKSwgYnkgPSBncmFwaF9yZXMpDQpyZWdfc3VyZmFjZSA8LSBleHBhbmQuZ3JpZChBZ2UgPSBheGlzX3gsIEVuZ2luZV9IUCA9IGF4aXNfeSwgS0VFUC5PVVQuQVRUUlMgPSBGKQ0KcmVnX3N1cmZhY2UkTVNSUCA8LSBwcmVkaWN0KHlvdW5nX2xpbl9yZWcsIG5ld2RhdGEgPSByZWdfc3VyZmFjZSkNCnJlZ19zdXJmYWNlIDwtIGFjYXN0KHJlZ19zdXJmYWNlLCBFbmdpbmVfSFAgfiBBZ2UsIHZhbHVlLnZhciA9ICJNU1JQIikNCg0KeW91bmdfcGxvdCA8LSBwbG90X2x5KHlvdW5nX3RyYWluX3NldCwgeCA9IH5BZ2UsIHkgPSB+RW5naW5lX0hQLCB6ID0gfk1TUlAsIHR5cGUgPSAic2NhdHRlcjNkIiwgc2l6ZSA9IDEsIG1vZGUgPSJtYXJrZXJzIikNCnlvdW5nX3Bsb3QgPC0gYWRkX3RyYWNlKHlvdW5nX3Bsb3QsIHggPSBheGlzX3gsIHkgPSBheGlzX3ksIHogPSByZWdfc3VyZmFjZSwgdHlwZSA9ICJzdXJmYWNlIikNCnlvdW5nX3Bsb3QNCmBgYA0KDQpMaW5lYXIgUmVncmVzc2lvbiBmb3IgJypPTEQqJyBjYXJzDQoNCmBgYHtyfQ0Kb2xkX3RyYWluX3NldCA9IHRyYWluX3NldFt0cmFpbl9zZXRbLCBBZ2UgPj0gYWdlX3Roc11dDQpvbGRfdmFsaWRfc2V0ID0gdmFsaWRfc2V0W3ZhbGlkX3NldFssIEFnZSA+PSBhZ2VfdGhzXV0NCm9sZF9saW5fcmVnID0gbG0oTVNSUCB+IEFnZSArIEFnZTpFbmdpbmVfSFAgKyBwb2x5KEVuZ2luZV9IUCwgMiksDQogICAgICAgICAgICAgICAgICAgZGF0YSA9IG9sZF90cmFpbl9zZXQpDQpzdW1tYXJ5KG9sZF9saW5fcmVnKQ0KYGBgDQoNCkNhbGN1bGF0ZSBSTVNFIG9uIHRyYWluaW5nIGFuZCB2YWxpZGF0aW9uIHNldA0KDQpgYGB7cn0NCnlfcHJlZCA9IHByZWRpY3Qob2xkX2xpbl9yZWcsIG5ld2RhdGEgPSBvbGRfdHJhaW5fc2V0KQ0Kcm1zZShvbGRfdHJhaW5fc2V0WywgTVNSUF0sIHlfcHJlZCkNCnlfcHJlZCA9IHByZWRpY3Qob2xkX2xpbl9yZWcsIG5ld2RhdGEgPSBvbGRfdmFsaWRfc2V0KQ0Kcm1zZShvbGRfdmFsaWRfc2V0WywgTVNSUF0sIHlfcHJlZCkNCmBgYA0KDQpWaXN1YWxpemUgcmVzdWx0cw0KDQpgYGB7cn0NCmhpc3Qob2xkX3ZhbGlkX3NldFssIE1TUlBdLCANCiAgICAgYnJlYWtzID0gMjAsIA0KICAgICBjb2wgPSByZ2IoMCwwLDEsMS80KSwgDQogICAgIG1haW4gPSAiTGluZWFyIFJlZ3Jlc3Npb24gKE9sZCBDYXJzKSIsDQogICAgIHhsYWIgPSAiTVNSUCIpDQpoaXN0KHlfcHJlZCwgYnJlYWtzID0gMjAsIGNvbD1yZ2IoMSwwLDAsMS80KSwgYWRkID0gVCkNCmJveCgpDQpgYGANCg0KVmlzdWFsaXplIG1vZGVsDQoNCmBgYHtyfQ0KZ3JhcGhfcmVzIDwtIDENCmF4aXNfeCA8LSBzZXEoYWdlX3RocywgbWF4KG9sZF90cmFpbl9zZXQkQWdlKSwgYnkgPSBncmFwaF9yZXMpDQpheGlzX3kgPC0gc2VxKG1pbihvbGRfdHJhaW5fc2V0JEVuZ2luZV9IUCksIG1heChvbGRfdHJhaW5fc2V0JEVuZ2luZV9IUCksIGJ5ID0gZ3JhcGhfcmVzKQ0KcmVnX3N1cmZhY2UgPC0gZXhwYW5kLmdyaWQoQWdlID0gYXhpc194LCBFbmdpbmVfSFAgPSBheGlzX3ksIEtFRVAuT1VULkFUVFJTID0gRikNCnJlZ19zdXJmYWNlJE1TUlAgPC0gcHJlZGljdChvbGRfbGluX3JlZywgbmV3ZGF0YSA9IHJlZ19zdXJmYWNlKQ0KcmVnX3N1cmZhY2UgPC0gYWNhc3QocmVnX3N1cmZhY2UsIEVuZ2luZV9IUCB+IEFnZSwgdmFsdWUudmFyID0gIk1TUlAiKQ0KDQpvbGRfcGxvdCA8LSBwbG90X2x5KG9sZF92YWxpZF9zZXQsIHggPSB+QWdlLCB5ID0gfkVuZ2luZV9IUCwgeiA9IH5NU1JQLCB0eXBlID0gInNjYXR0ZXIzZCIsIHNpemUgPSAxLCBtb2RlID0ibWFya2VycyIpDQpvbGRfcGxvdCA8LSBhZGRfdHJhY2Uob2xkX3Bsb3QsIHggPSBheGlzX3gsIHkgPSBheGlzX3ksIHogPSByZWdfc3VyZmFjZSwgdHlwZSA9ICJzdXJmYWNlIikNCm9sZF9wbG90DQpgYGANCg0KVXNpbmcgb25seSAqKkVuZ2luZV9IUCoqDQoNCmBgYHtyfQ0Kb2xkX2xpbl9yZWcgPSBsbShNU1JQIH4gcG9seShFbmdpbmVfSFAsIDIpLA0KICAgICAgICAgICAgICAgICBkYXRhID0gb2xkX3RyYWluX3NldCkNCnN1bW1hcnkob2xkX2xpbl9yZWcpDQpgYGANCg0KQ2FsY3VsYXRlIFJNU0Ugb24gdHJhaW5pbmcgYW5kIHZhbGlkYXRpb24gc2V0DQoNCmBgYHtyfQ0KeV9wcmVkID0gcHJlZGljdChvbGRfbGluX3JlZywgbmV3ZGF0YSA9IG9sZF90cmFpbl9zZXQpDQpybXNlKG9sZF90cmFpbl9zZXRbLCBNU1JQXSwgeV9wcmVkKQ0KeV9wcmVkID0gcHJlZGljdChvbGRfbGluX3JlZywgbmV3ZGF0YSA9IG9sZF92YWxpZF9zZXQpDQpybXNlKG9sZF92YWxpZF9zZXRbLCBNU1JQXSwgeV9wcmVkKQ0KYGBgDQoNClZpc3VhbGl6ZSBtb2RlbA0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gb2xkX3ZhbGlkX3NldCwgYWVzKHggPSBFbmdpbmVfSFAsIHkgPSBNU1JQKSkgKyBnZW9tX3BvaW50KCkNCmdnbWF0cGxvdCh4ID0gb2xkX3ZhbGlkX3NldCRFbmdpbmVfSFAsIHkgPSB5X3ByZWQsIGFkZCA9IFQsIHR5cGUgPSAibCIsIGNvbCA9ICJyZWQiKQ0KYGBgDQoNCiMgUHJlZGljdCBUZXN0IFNldA0KDQpMb2FkIHRlc3Qgc2V0DQoNCmBgYHtyfQ0KY2FyX3Rlc3QgPSBmcmVhZCgiLi9kYXRhL2Nhcl9wcmljZV90ZXN0LmNzdiIpDQpgYGANCg0KQ2hlY2sgZm9yIG1pc3NpbmcgdmFsdWVzIGluIEZlYXR1cmVzIG9mIEludGVyZXN0DQoNCmBgYHtyfQ0Kc3VtKGlzLm5hKGNhcl90ZXN0Wyxmb2ksIHdpdGggPSBGXSkpDQpgYGANCg0KKipNT0RFTCAxLjMqKiA6IFR1bmVkIFJhbmRvbSBGb3Jlc3QNCg0KYGBge3J9DQp5X3ByZWRfcmYgPSBwcmVkaWN0KHJmX3JlZ18yLCBuZXdkYXRhPWNhcl90ZXN0WywgZm9pLCB3aXRoPUZdKQ0KYGBgDQoNClNhdmUgYXMgY3N2IGZpbGUNCg0KYGBge3J9DQojIHdyaXRlLmNzdihkYXRhLmZyYW1lKElEID0gY2FyX3Rlc3QkSUQsIFByaWNlID0geV9wcmVkX3JmKSwNCiMgICAgICAgICAgIHJvdy5uYW1lcyA9IEZBTFNFLA0KIyAgICAgICAgICAgZmlsZSA9ICIuL3Jlc3VsdHMvRGF2aWRlX0JlbGZpb3JpX3N1Ym1pc3Npb24xLmNzdiIpDQpgYGANCg0KKipNT0RFTCAyKiogOiBOZXVyYWwgTmV0d29yaw0KDQpgYGB7cn0NCmNhcl90ZXN0X3NjYWxlZCA9IHByZWRpY3QoZGF0YVNjYWxlciwgY2FyX3Rlc3RbLCBmb2ksIHdpdGg9Rl0pDQp5X3ByZWRfbm4gPSBwcmVkaWN0KG5uX3JlZywgY2FyX3Rlc3Rfc2NhbGVkKQ0KYGBgDQoNClNhdmUgYXMgY3N2IGZpbGUNCg0KYGBge3J9DQojIHdyaXRlLmNzdihkYXRhLmZyYW1lKElEID0gY2FyX3Rlc3QkSUQsIFByaWNlID0geV9wcmVkX25uKSwNCiMgICAgICAgICAgIHJvdy5uYW1lcyA9IEZBTFNFLA0KIyAgICAgICAgICAgZmlsZSA9ICIuL3Jlc3VsdHMvRGF2aWRlX0JlbGZpb3JpX3N1Ym1pc3Npb24yLmNzdiIpDQpgYGANCg0KKipNT0RFTCAzKiogOiBMaW5lYXIgUmVncmVzc2lvbg0KDQpgYGB7cn0NCnlvdW5nX2Nhcl90ZXN0ID0gY2FyX3Rlc3RbY2FyX3Rlc3RbLCBBZ2UgPCBhZ2VfdGhzXV0NCnlfcHJlZF9uZXcgPSBwcmVkaWN0KHlvdW5nX2xpbl9yZWcsIG5ld2RhdGEgPSB5b3VuZ19jYXJfdGVzdCkNCm9sZF9jYXJfdGVzdCA9IGNhcl90ZXN0W2Nhcl90ZXN0WywgQWdlID49IGFnZV90aHNdXQ0KeV9wcmVkX29sZCA9IHByZWRpY3Qob2xkX2xpbl9yZWcsIG5ld2RhdGEgPSBvbGRfY2FyX3Rlc3QpDQpgYGANCg0KDQpTYXZlIGFzIGNzdiBmaWxlDQoNCmBgYHtyfQ0KIyB3cml0ZS5jc3YocmJpbmQoZGF0YS5mcmFtZShJRCA9IHlvdW5nX2Nhcl90ZXN0JElELCBQcmljZSA9IHlfcHJlZF9uZXcpLA0KIyAgICAgICAgICAgICAgICAgZGF0YS5mcmFtZShJRCA9IG9sZF9jYXJfdGVzdCRJRCwgUHJpY2UgPSB5X3ByZWRfb2xkKSksDQojICAgICAgICAgICByb3cubmFtZXMgPSBGQUxTRSwNCiMgICAgICAgICAgIGZpbGUgPSAiLi9yZXN1bHRzL0RhdmlkZV9CZWxmaW9yaV9zdWJtaXNzaW9uMy5jc3YiKQ0KYGBgDQoNCkNvbXBhcmUgcmVzdWx0cw0KDQpgYGB7cn0NCmhpc3QoeV9wcmVkX3JmLCANCiAgICAgYnJlYWtzID0gMjAsIA0KICAgICBjb2wgPSByZ2IoMCwwLDEsMS80KSwgDQogICAgIG1haW4gPSAiU3VtbWFyeSIsDQogICAgIHhsYWIgPSAiTVNSUCIpDQpoaXN0KHlfcHJlZF9ubiwgYnJlYWtzID0gMjAsIGNvbD1yZ2IoMSwwLDAsMS80KSwgYWRkID0gVCkNCmhpc3QoYXBwZW5kKHlfcHJlZF9uZXcsIHlfcHJlZF9vbGQpLCBicmVha3MgPSAyMCwgY29sPXJnYigwLDEsMCwxLzQpLCBhZGQgPSBUKQ0KYm94KCkNCmBgYA0K